#######################################################################################################
##### Create datasets that combine waves 1 and wave 2 and use other external datatsets (cesop and administrative)
##### Note that this part of the code uses data from wave 1 and wave 2
######################################################################################################

rm(list=ls())
library(questionr)
library(car)
library(foreign)
library(xtable)
library(MASS)
library(tidyverse)
library(MatchIt)
library(lubridate)
library(ANOVAreplication)
library(estimatr)
library(mice)

setwd("~/Dropbox/Replication-Making-JoP/") #Set WD for replication
source("Code/functions.R") #auxiliary functions for analysis

#Data for Table A.1 data 
#The table itself is typset in 4_analysis_outputs.R
#This part of the code cannot be replicated because it contains identifying info
#We provide the out.a1.RData instead
if(1==2){#this table uses identified datasets
  
load("Data/sorteados_october2018.Rda")
load("Data/inscritos_october2018.Rda")
winners <- c(nrow(sorteadosf %>% filter(id_edital == "edital03.2011")),
             nrow(sorteadosf %>% filter(id_edital == "edital06.2011")),
             nrow(sorteadosf %>% filter(id_edital == "edital17.2016")),
             nrow(sorteadosf %>% filter(id_edital == "edital20.2016")))
nonwinners <- c(nrow(inscritosf %>% filter(id_edital == "edital03.2011")) - nrow(sorteadosf %>% filter(id_edital == "edital03.2011")),
                nrow(inscritosf %>% filter(id_edital == "edital06.2011")) - nrow(sorteadosf %>% filter(id_edital == "edital06.2011")),
                nrow(inscritosf %>% filter(id_edital == "edital17.2016")) - nrow(sorteadosf %>% filter(id_edital == "edital17.2016")),
                nrow(inscritosf %>% filter(id_edital == "edital20.2016")) - nrow(sorteadosf %>% filter(id_edital == "edital20.2016")))

rm(sorteadosf, inscritosf)

tb.a1a <- matrix(NA, 2, 4)
tb.a1a[1,] <- nonwinners
tb.a1a[2,] <- winners

#Pre-sample early lotteries
load("Data/data_sample_W2.1.RData")
load("Data/data_sample_W2.2.RData")
pre_e <- length(unique(W2.1$cpfstd)) + length(unique(W2.2$cpfstd))

#Pre-sample recent lotteries
load("Data/all_samples1_2.Rda")
pre_r <- length(unique(all_samples$cpfstd))

#Contacted early lotteries
bd <- read_csv("Data/fgv_mcmv_campo.csv")
int_he <- interval(ymd("2020-05-24"), ymd("2020-06-01")) #high-effort interviews
bd <- bd %>% mutate(dates = ymd(data_de_campo),
                    he = ifelse(dates %within%  int_he, 1, 0))
bd <- bd %>% filter(he != 1) #exclude interviews conducted in high-effort period.

contactede <- bd %>% filter(b1 == 1)
contactede <- length(unique(contactede$fieldid))

#Contacted by Recent Lotteries
load("Data/survey_late.Rda")
load("Data/survey_late2.Rda")

all <- bind_rows(survey_late, survey_late2)
contactedr <- all %>% filter(b1 == 1)
contactedr <- length(unique(contactedr$cpfstd))

#Interviewed Early Lotteries
load("Data/surveyW2-Making.Rda")
inte <- length(unique(survey$fieldID))
rm(survey)

#Interviewed Recent Lotteries
load("Data/surveyW1-Making.RData")
intr <- length(unique(survey$fieldID))
rm(survey)

tb.a1b <- matrix(NA, 3, 2)
tb.a1b[,1] <- c(pre_e, contactede, inte)
tb.a1b[,2] <- c(pre_r, contactedr, intr)

out.a1 <- list(outa1a=tb.a1a,
               outa1b=tb.a1b)
comment(out.a1) <- paste("Created in ",Sys.Date(),". Anonymized data for table A.1.",sep="")
save(out.a1,file=paste("Routputs/out-a1.RData",sep=""))
}#end table A.1

# Generate an object with those enrolled in both lotteries in each wave
# This is mentioned in the paper. 
# The text that refers to these data is typeset in 4_analyasis_outputs.R
# This part of the code cannot be ran, we provide tmp_inscritosearlylate.RData for replicaiton
if(1==2){# requires identified data
  # Read the "inscritos" file ########
  load("Data/inscritos_october2018.Rda")	
  
  i03.2011 <- inscritosf %>%  filter(id_edital == "edital03.2011") %>% select(names.clean, cpfstd)   
  i03.2011$edital.03.2011 <- T
  
  i06.2011 <- inscritosf  %>% filter(id_edital == "edital06.2011") %>% select(names.clean, cpfstd)
  i06.2011$edital.06.2011 <- T
  
  i17.2016 <- inscritosf  %>% filter(id_edital == "edital17.2016") %>% select(names.clean, cpfstd)
  i17.2016$edital.17.2016 <- T
  
  i20.2016 <- inscritosf %>% filter(id_edital == "edital20.2016")  %>% select(names.clean,  cpfstd) 
  i20.2016$edital.20.2016 <- T
  
  insc1 <- full_join(i03.2011,i06.2011, by=c("cpfstd","names.clean"))                  
  insc1$w2 <- T
  insc2 <- full_join(i17.2016,i20.2016, by=c("cpfstd","names.clean"))                    
  insc2$w1 <- T
  
  insc <- full_join(insc1,insc2, by=c("cpfstd","names.clean"))
  rm(insc1,insc2)
  #anonymize
  insc$names.clean <- insc$cpfstd <- NULL
  comment(insc) <- paste("Created in ",Sys.Date(),". Anonymized participation data.",sep="")
  save(insc, file="Routputs/out-inscritosearlylate.RData")
}#end inscritosearlylate


# Appendix H: data creation 

load("Data/surveyW1-Making.RData")
w1 <- survey

load("Data/surveyW2-Making.Rda")
w2 <- as.data.frame(survey)

w1 <- w1 %>% mutate(wave2 = 0,
                    ff2003 = rr2003>0,
                    ff2004 = rr2004>0,
                    ff2005 = rr2005>0,
                    ff2006 = rr2006>0,
                    ff2007 = rr2007>0,
                    ff2008 = rr2008>0,
                    ff2009 = rr2009>0,
                    ff2010 = rr2010>0) %>% select(wave2, Z, sexor, idadeW2,
                                                  race_bin,
                                                  cadunico,
                                                  formal_pre,
                                                  av_prel, sampling_prob, id_edital, 
                                                  fieldID, rr2003, 
                                                  rr2004, rr2005, rr2006, rr2007, rr2008, rr2009, 
                                                  rr2010,
                                                  ff2003, ff2004, ff2005, ff2006, ff2007, ff2008, ff2009, 
                                                  ff2010,
                                                  edital03.2011,edital06.2011,edital17.2016,edital20.2016,
                                                  self_index,
                                                  market_index,
                                                  red_index,
                                                  tax_item,
                                                  red_abstract,
                                                  red_concrete,
                                                  effortbetter,
                                                  trustothers,
                                                  successalone,
                                                  moneyimportant,
                                                  govind, 
                                                  competition,
                                                  pea,inc_above1mw,placement,
                                                  kahndeat_pos, kahndeat_blue, cantril,prosp_self)
w2 <- w2 %>% mutate(wave2 = 1, 
                    sampling_prob = 1,
                    ff2003 = r2003>0,
                    ff2004 = r2004>0,
                    ff2005 = r2005>0,
                    ff2006 = r2006>0,
                    ff2007 = r2007>0,
                    ff2008 = r2008>0,
                    ff2009 = r2009>0,
                    ff2010 = r2010>0) %>% select(wave2, Z, sexor, idade,
                                                 race_bin,
                                                 cadunico,
                                                 formal_pre,
                                                 av_prel, sampling_prob, id_edital, 
                                                 fieldID, r2003, 
                                                 r2004, r2005, r2006, r2007, r2008, r2009, 
                                                 r2010,
                                                 ff2003, ff2004, ff2005, ff2006, ff2007, ff2008, ff2009, 
                                                 ff2010,
                                                 edital03.2011,edital06.2011,edital17.2016,edital20.2016,
                                                 self_index,
                                                 market_index,
                                                 red_index,
                                                 tax_item,
                                                 red_abstract,
                                                 red_concrete,
                                                 effortbetter,
                                                 trustothers,
                                                 successalone,
                                                 moneyimportant,
                                                 govind, 
                                                 competition,
                                                 pea,inc_above1mw,placement,
                                                 kahndeat_pos, kahndeat_blue, cantril,prosp_self) %>% rename(rr2003 = r2003, 
                                                                                                             rr2004 = r2004,
                                                                                                             rr2005 = r2005,
                                                                                                             rr2006 = r2006,
                                                                                                             rr2007 = r2007,
                                                                                                             rr2008 = r2008,
                                                                                                             rr2009 = r2009,
                                                                                                             rr2010 = r2010,
                                                                                                             idadeW2 = idade)

allw <- bind_rows(w1, w2)
allw$fe <- factor(allw$id_edital)#not strictly necessary

comment(allw) <- paste("Created in ",Sys.Date(),". Data for macthing analysis in Appendix H.",sep="")
save(allw, file = paste("Routputs/allw.Rda", sep=""))

######################################################################################################
# Compare the samples on pre-treatment covariate balance
#####################################################################################################

# Recreate summary RAIS variables pre-treatment
allw$av_pre <- rowSums(subset(allw,select=c(rr2008,rr2009,rr2010)),na.rm=T)/3  
allw$formal_pre <- rowSums(subset(allw,select=c(rr2008,rr2009,rr2010))!=0)

pre.treat <-  c("idadeW2", "sexor", 
                "race_bin",
                "cadunico",
                "formal_pre",
                "av_prel"
)

bal.labels <- c("Age",
                "Sex (male)",
                "Race (white)",
                "Cadunico",
                "Yrs in Formal Employment", 
                "Av. Formal Wages"
)

#Implements an F-test
the.formula <- formula(paste("wave2~",paste(pre.treat,collapse="+")))
m1 <- lm_robust(the.formula, 
                data = allw, clusters = fieldID,
                weights = 1/sampling_prob,
                se_type = "stata")
#Standard F-test 
the.wald <- lmtest::waldtest(m1,  test = c("F"))

# Compute balance variable by variable (using linear model:lmr)
ballmr <- bind_rows(lapply(1:length(pre.treat), outcomes=pre.treat, FUN = est_lm,
                           treat="wave2",
                           ctrls = NULL, 
                           clusters = "fieldID",
                           the.data=allw)) 
rownames(ballmr) <- pre.treat
print(ballmr)

# Declare unstandardized outcomes to compare
outcomes <- c(
  "effortbetter",
  "trustothers",
  "successalone",
  "moneyimportant",
  "red_abstract",
  "red_concrete",
  "govind",
  "competition",
  "pea",
  "inc_above1mw",
  "placement",
  "kahndeat_pos",
  "kahndeat_blue",
  "cantril",
  "prosp_self"
)

outcomes.labels <-  c("Effort Better", "Trust Others", "Success Alone", 
                      "Money Important", 
                      "Redistribution (Abstract)", 
                      "Redistribution (Concrete)","Govt vs. Individual", 
                      "Comp. is Good"
                      ,"PEA"
                      ,"Inc.>1MW"
                      ,"Self-placement"
                      ,"Pos. Affect"
                      ,"Blue Affect"
                      ,"Cantril"
                      ,"Expectations")


#Implments an F-test
the.formula <- formula(paste("wave2~",paste(outcomes,collapse="+")))
m1 <- lm_robust(the.formula, 
                data = subset(allw,Z==0), ## Controls ONLY
                clusters = fieldID,
                weights = 1/sampling_prob,
                se_type = "stata")
#Standard F-test 
the.waldo <- lmtest::waldtest(m1,  test = c("F"))
print(the.waldo)

# Compute balance variable by variable (using linear model:lmr)
ballmro <- bind_rows(lapply(1:length(outcomes), 
                           outcomes=outcomes, FUN = est_lm,
                           treat="wave2",ctrls = NULL, the.data=subset(allw,Z==0)))
rownames(ballmro) <- c(outcomes)
print(ballmro)

out.bal.waves <- list(ftest = the.wald, 
                      bal = ballmr, 
                      ftesto = the.waldo, 
                      balo = ballmro)
comment(out.bal.waves) <- paste("Estimated in ",Sys.Date(),". Comparing samples balance and outcomes",sep="")
save(out.bal.waves,file=paste("Routputs/out-bal-waves.RData",sep=""))

######################################################################################################
# Predict outcomes in the control group of each wave
#####################################################################################################

## Re-compute the standardized outcome indices on the pooled set of both waves
## Creating new indices names 
set.seed(1977)
red_index_vars <- imp.mice(allw %>% select(tax_item, red_abstract, red_concrete)) 
market_index_vars <- imp.mice(allw %>% select(effortbetter,
                                              trustothers,
                                              successalone,
                                              moneyimportant)) 
self_index_vars <- imp.mice(allw %>% select(govind, competition)) 
hap_index_vars <- imp.mice(allw %>% 
                             select(kahndeat_pos, kahndeat_blue, cantril) %>%
                             mutate(kahndeat_blue=-1*kahndeat_blue)) 

allw <- allw %>% bind_cols(
  red_index_ctrl_p = calculate_mean_effects_index(Z = allw$Z
                                           , outcome_mat = red_index_vars, greedy = TRUE ), 
  market_index_ctrl_p = calculate_mean_effects_index(Z = allw$Z
                                              , outcome_mat = market_index_vars, greedy = TRUE ), 
  self_index_ctrl_p = calculate_mean_effects_index(Z = allw$Z      
                                            , outcome_mat = self_index_vars, greedy = TRUE ),
  hap_index_ctrl_p =  calculate_mean_effects_index(Z = allw$Z
                                            , outcome_mat = hap_index_vars,  greedy = TRUE ))

attitudes_index_vars <- imp.mice(allw %>% select(tax_item,
                                                 red_abstract,
                                                 red_concrete,
                                                 effortbetter,
                                                 trustothers,
                                                 successalone,
                                                 moneyimportant,
                                                 govind, 
                                                 competition))
allw <- allw %>% bind_cols(
  attitudes_index_ctrl_p = calculate_mean_effects_index(Z = allw$Z
                                                 , outcome_mat = attitudes_index_vars, greedy = TRUE ))


# Create welfare index 
welfare_index_vars <- imp.mice(allw %>% select(pea,inc_above1mw,placement))
allw <- allw %>% bind_cols(welfare_index_ctrl_p = calculate_mean_effects_index(Z = allw$Z
                                               , outcome_mat = welfare_index_vars, greedy = TRUE ))


outcomes <- c(
  "attitudes_index_ctrl_p",
  "red_index_ctrl_p",
  "self_index_ctrl_p",
  "market_index_ctrl_p",
  "welfare_index_ctrl_p",
  "pea",
  "inc_above1mw",
  "placement",
  "prosp_self",
  "hap_index_ctrl_p"
)

pre.treat <-  c("idadeW2", "sexor", 
                "race_bin",
                "cadunico",
                "formal_pre",
                "av_prel"
)

bal.labels <- c("Age",
                "Sex (male)",
                "Race (white)",
                "Cadunico",
                "Yrs in Formal Employment", 
                "Av. Formal Wages")

#Subset the data to compare keep only control group of each wave
pooled.ctrls <- subset(allw,Z==0)

preds <- bind_rows(lapply(1:length(outcomes)
                          ,FUN = pred_out
                          ,treat="wave2"
                          ,fe="id_edital"
                          ,outcomes=outcomes
                          ,ctrls=pre.treat
                          ,clusters="fieldID"
                          ,w.var="sampling_prob"
                          ,the.data=pooled.ctrls))

comment(preds) <- paste("Computed in ",Sys.Date(),". Comparison of control groups between waves.",sep="")
save(preds, file = "Routputs/preds-comparing.Rda")

####################################################################################
# Analysis of W2 Sample Matched to Resemble W1
# We re-estimates selected Pooled Effects                                     ######
# Estimation (2 estimators ITT-lin, ITT-lm)                                   ######
# Summary tables (combining selected ITT)                                     ######
####################################################################################

allw <- subset(allw,select=c(idadeW2,sexor,race_bin,cadunico,formal_pre,av_prel,
                             Z,fieldID,id_edital,wave2))
allw$wave1 <- 1-allw$wave2 #invert treatment so to have more controls
allw$uniqueID <- paste(allw$id_edital,allw$fieldID,sep=".")
formula.M <- formula(wave1~
                       idadeW2+sexor+race_bin+cadunico+formal_pre+av_prel)

#Attention###
set.seed(1977)
m <- matchit(formula.M,replace=T,ratio=3,data=allw,exact="Z") 
summary(m)
plot(summary(m,standardize=TRUE),interactive=F)
plot(m,type= "jitter",interactive=F)
plot(m,type= "hist",interactive=F)
allm <- match.data(m)

## Description
summary(m)
table(allm$weights)
cat("In this matching exercize we keep all "
    ,summary(m)$nn[2,"Treated"]," observations in W1 and "
    ,summary(m)$nn[2,"Control"]," of " 
    ,summary(m)$nn[1,"Control"]," observations in W2.\n", sep="")

## Save weights with some ID that allows it to be merged into the original data set
matching_weights <- subset(allm,select=c(uniqueID,fieldID,id_edital,weights,wave2))
save(matching_weights, file = "Routputs/matching_weights.RData")

####################################################################################
# Analysis of W2 Sample Matched to Resemble W1
# We re-estimates selected Pooled Effects (using matching weights, above)                                  ######
# Estimation (2 estimators ITT-lin, ITT-lm)                                   ######
# Summary tables (combining selected ITT)                                     ######
####################################################################################

load("Routputs/matching_weights.RData")
load("Routputs/data-W2-Making-recoded.Rda")
print(comment(surveyw2))

matching_weights <- subset(matching_weights,wave2==1)
w2matched <-  subset(matching_weights,wave2==1) %>% left_join(surveyw2, by = c("fieldID","id_edital"))

# Declare labels for control and balance variables
controls <- pre.treat <-  c("idade", 
                            "sexor", 
                            "race_bin", 
                            "cadunico",
                            "formal_pre",
                            "av_prel")

bal.labels <- c("Age",
                "Sex (male)",
                "Race (white)", 
                "Registry (Cadunico)",
                "Years in Formal Employment", 
                "Avg. Formal Wages")

outcomes1 <- c(
  "attitudes_index",
  "self_index",
  "market_index",
  "red_index")

outcomes2 <- c(
  "welfare_index",
  "placement",
  "pea",
  "inc_above1mw")

outcomes3 <- c("hap_index")
outcomes4 <- c("prosp_self") 

#These definitions are needed below, in different parts of the code
uO <- length(grep("outcomes\\d",ls()))
the.fam <- paste("outcomes",1:uO,sep="")
outcomes <- do.call("c",sapply(paste("outcomes",1:uO,sep=""),"get"))
out.class <- paste("H",1:uO,sep="")
names(outcomes)<-NULL
out.selected <- which(is.element(outcomes,c("attitudes_index","welfare_index","hap_index","prosp_self")))
the.labels <-  c("Attitudes Index","Self-reliance",
                 "Market Beliefs","Redistribution",
                 "Welfare Index"
                 ,"Placement"
                 ,"PEA"
                 ,"Inc.>1MW"
                 ,"Happiness"
                 ,"Expectations")

# Check whether labels are correct
main <- rep(0,length(outcomes))
main[out.selected] <- 1
var.labels<- data.frame(outcomes,the.labels,main)
print(var.labels)

####  ITT-Lin: two variants  ####
#out.lin       & (no age, no controls, with edital as covariates and survey weights)
#out.linc      & (with age and controls, with edital as covariates and survey weights)(*)
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital")
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=w2matched
                        ,w=w2matched$weights))
rownames(tmp) <- outcomes
out.linw <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.linw$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.linw[get(the.fam[ii]),"HBp"] <- p.adjust(out.linw[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.linw$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= c("id_edital",pre.treat)
                        ,clusters="fieldID"
                        ,FUN = est_lin, the.data=w2matched
                        ,w=w2matched$weights))
rownames(tmp) <- outcomes
out.lincw <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lincw$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lincw[get(the.fam[ii]),"HBp"] <- p.adjust(out.lincw[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lincw$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.linw <- list(out.linw=out.linw,out.lincw=out.lincw)  

####  ITT-Lm: two variants  #### 
#out.lm        & (no age, no controls, with edital as covariates and survey weights)
#out.lmc       & (with age and controls, with edital as covariates and survey weights)(*) 
tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= NULL
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=w2matched
                        ,w=w2matched$weights))
rownames(tmp) <- outcomes
out.lmw <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmw$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmw[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmw[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmw$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

tmp <- bind_rows(lapply(1:length(outcomes)
                        ,outcomes= outcomes
                        ,ctrls= pre.treat
                        ,fe = "id_edital"
                        ,clusters="fieldID"
                        ,FUN = est_lm, the.data=w2matched
                        ,w=w2matched$weights))
rownames(tmp) <- outcomes
out.lmcw <- as.data.frame(tmp)[,c("Estimate","Std. Error","Pr(>|t|)","N","Std. Estimate")]                
#Compute HB corrected p-values by hypothesis family
out.lmcw$HBp <- NA 
for(ii in 1:length(the.fam)){
  out.lmcw[get(the.fam[ii]),"HBp"] <- p.adjust(out.lmcw[get(the.fam[ii]),"Pr(>|t|)"],"BH")
}
out.lmcw$HBp[which(main==1)] <- NA #main outcomes are not subject to correction per PAP

out.itt.lmw <- list(out.lmw=out.lmw,out.lmcw=out.lmcw)  

### Collect all results (list of lists)
out.allw <- list(out.itt.lmw=out.itt.lmw,out.itt.linw=out.itt.linw)
comment(out.allw) <- paste("Estimated in ",Sys.Date(),". Using dataset matched between waves.",sep="")
save(out.allw,file=paste("Routputs/out-W2-matchedestimates.RData",sep=""))


### Creating Figure from CESOP data

d <-  read.spss("Data/CESOP/04683/04683.SAV"
                ,to.data.frame=T
                ,add.undeclared.levels = c("sort")
                ,duplicated.value.labels = c("append"))
names(d) <- tolower(names(d))

### Recode income and a few others to predict missing income

d$fam.income <- factor(car::recode(d$rend2,"'ATÉ 1'='00.01';
                                    'MAIS DE 1 A 2'='01.02';
                                   'MAIS DE 2 A 5'='02.05';
                                   'MAIS DE 5 A 10'='05.10';
                                   c('MAIS DE 10 A 20','MAIS DE 20')='10.'; 
                                   'NÃO TEM RENDIMENTO PESSOAL'='00.01';
                                   'NÃO RESPONDEU'=NA"),ordered=T)
d$ind.income <- factor(car::recode(d$rend1,"'ATÉ 1'='00.01';
                                   'MAIS DE 1 A 2'='01.02';
                                   'MAIS DE 2 A 5'='02.05';
                                   'MAIS DE 5 A 10'='05.10';
                                   c('MAIS DE 10 A 20','MAIS DE 20')='10.';
                                   'NÃO TEM RENDIMENTO PESSOAL'='00.01';
                                   'NÃO RESPONDEU'=NA"),ordered=T)


d$pent <- d$p45=="Assembléia de Deus"|
  d$p45=="Universal do Reino de Deus"|
  d$p45=="Deus é Amor"|
  d$p45=="Evangelho Quadrangular"|
  d$p45=="Igreja Internacional da Graça"|
  d$p45=="Renascer em Cristo"|
  d$p45=="Sara nossa terra"|
  d$p45=="Outras Evangélicas específicas"|
  d$p45=="Evangélica - Não sabe especificar"

d$white <- d$p44=="Branca"|d$p44=="Amarela"

### Do individuals or family members participate in BF?
### Individuals could list up to 8 social programs (eight variables)
### And for family members they could list up to 5
### So, we merge the 8 into one, and the 5 into one indicating participation in BF only

d$bf.family <- F
bf.a <- c(grep("Bolsa Família",d$p46a01),
          grep("Bolsa Família",d$p46a02),
          grep("Bolsa Família",d$p46a03),
          grep("Bolsa Família",d$p46a04),
          grep("Bolsa Família",d$p46a05),
          grep("Bolsa Família",d$p46a06),
          grep("Bolsa Família",d$p46a07),
          grep("Bolsa Família",d$p46a08))
bf.b <- c(grep("Bolsa Família",d$p46b01),
          grep("Bolsa Família",d$p46b02),
          grep("Bolsa Família",d$p46b03),
          grep("Bolsa Família",d$p46b04),
          grep("Bolsa Família",d$p46b05))

d$bf.family [union(bf.a,bf.b)] <- T
d$superior <- d$inst=="SUP. INC."|d$inst=="SUP. COMP."
d$analfa <- d$inst=="ANALF."
d$N.NE <- d$reg=="NORTE"|d$reg=="NORDESTE"
d$capital <- d$condx=="CAPITAL"

# Predicting regression (not using individual income because there's NAs there too)
pred.reg <- polr(fam.income~pent+white+bf.family+N.NE+capital+analfa+superior, d, method = c("logistic"))
summary(pred.reg)

# Predict missing income observations
d$fam.income.imp <- d$fam.income
d$fam.income.imp[is.na(d$fam.income)]<-   predict(pred.reg,newdata = d[is.na(d$fam.income),])
d$fam.income.imp <- factor(d$fam.income.imp,ordered=F) #remover ordering

## Evaluation of social programs
## Programs are grouped into three separate questions P13, P14, P15
## Here we select only the eight main programs, listed in the previous questions
d<- rename.variable(d,"p1501","ev.bf") 
d<- rename.variable(d,"p1502","ev.mcmv")
d<- rename.variable(d,"p1402","ev.farmpop")
d<- rename.variable(d,"p1301","ev.enem")
d<- rename.variable(d,"p1304","ev.pronatec")
d<- rename.variable(d,"p1401","ev.mm")
d<- rename.variable(d,"p1303","ev.fies")
d<- rename.variable(d,"p1302","ev.prouni")
eval.vars <- c("ev.mcmv","ev.bf","ev.prouni","ev.pronatec"
               ,"ev.enem","ev.farmpop","ev.fies","ev.mm")

d[,eval.vars] <- sapply(d[,eval.vars], function(x){as.numeric(nsnr(x))})

# Average evaluation
selected <- d[,eval.vars]
ev.mean <- apply(selected,2,mean,na.rm=T)

# Dispersion
ev.sd <- apply(selected,2,sd,na.rm=T)

all.regs <- lapply(1:length(eval.vars),all.lm)
names(all.regs)<- gsub("ev.","",eval.vars)

#Collect
out.all.cesop <- list(all.regs=all.regs, 
                      ev.mean = ev.mean)
comment(out.all.cesop) <- paste("Estimated in ",Sys.Date(),". Public Opinion Data.",sep="")
save(out.all.cesop,file=paste("Routputs/out-cesop.RData",sep=""))
